##Download into R Studio the PlayerBBall.csv dataset. This data set is every NBA basketball player from 1950 to present. It contains their height, weight, position and the year they played (among other data.) (Position: F-Forward, C-Centers, F-C and C-F – Forward /Centers, G – Guards, F-G – Forward/Guards) FYI: If you feel that these questions are open ended or at least a little vague, this is on purpose. Answer the question as you understand it and make any assumptions you need to make to answer the question and record those assumptions. (3-5 hours)
hello <- read.csv("PlayersBBall.csv")
head(hello)
## name year_start year_end position height weight
## 1 Alaa Abdelnaby 1991 1995 F-C 6-10 240
## 2 Zaid Abdul-Aziz 1969 1978 C-F 6-9 235
## 3 Kareem Abdul-Jabbar 1970 1989 C 7-2 225
## 4 Mahmoud Abdul-Rauf 1991 2001 G 6-1 162
## 5 Tariq Abdul-Wahad 1998 2003 F 6-6 223
## 6 Shareef Abdur-Rahim 1997 2008 F 6-9 225
## birth_date college
## 1 June 24, 1968 Duke University
## 2 April 7, 1946 Iowa State University
## 3 April 16, 1947 University of California, Los Angeles
## 4 March 9, 1969 Louisiana State University
## 5 November 3, 1974 San Jose State University
## 6 December 11, 1976 University of California
summary(hello)
## name year_start year_end position
## Length:4550 Min. :1947 Min. :1947 Length:4550
## Class :character 1st Qu.:1969 1st Qu.:1973 Class :character
## Mode :character Median :1986 Median :1992 Mode :character
## Mean :1985 Mean :1989
## 3rd Qu.:2003 3rd Qu.:2009
## Max. :2018 Max. :2018
##
## height weight birth_date college
## Length:4550 Min. :114.0 Length:4550 Length:4550
## Class :character 1st Qu.:190.0 Class :character Class :character
## Mode :character Median :210.0 Mode :character Mode :character
## Mean :208.9
## 3rd Qu.:225.0
## Max. :360.0
## NA's :6
#Use the dataset to visually investigate the distribution of the weight of centers (C) is greater than the distribution of the weight of forwards (F).
hist(hello$weight, main = “weight of all players”, col=“blue”)
#CENTERS
centers <- subset(hello, hello$position == "C")
head(centers)
## name year_start year_end position height weight
## 3 Kareem Abdul-Jabbar 1970 1989 C 7-2 225
## 22 Steven Adams 2014 2018 C 7-0 255
## 33 Alexis Ajinca 2009 2017 C 7-2 248
## 36 Solomon Alabi 2011 2012 C 7-1 251
## 38 Gary Alcorn 1960 1961 C 6-9 225
## 40 Cole Aldrich 2011 2018 C 6-11 250
## birth_date college
## 3 April 16, 1947 University of California, Los Angeles
## 22 July 20, 1993 University of Pittsburgh
## 33 May 6, 1988
## 36 March 21, 1988 Florida State University
## 38 October 8, 1936 California State University, Fresno
## 40 October 31, 1988 University of Kansas
summary(centers)
## name year_start year_end position
## Length:502 Min. :1947 Min. :1947 Length:502
## Class :character 1st Qu.:1972 1st Qu.:1976 Class :character
## Mode :character Median :1993 Median :1998 Mode :character
## Mean :1988 Mean :1993
## 3rd Qu.:2004 3rd Qu.:2009
## Max. :2018 Max. :2018
##
## height weight birth_date college
## Length:502 Min. :190.0 Length:502 Length:502
## Class :character 1st Qu.:230.0 Class :character Class :character
## Mode :character Median :245.0 Mode :character Mode :character
## Mean :244.6
## 3rd Qu.:256.0
## Max. :360.0
## NA's :2
hist(centers$weight, main = "weight of centers", col="blue")
#forwards
forwards <- subset(hello, hello$position == "F")
head(forwards)
## name year_start year_end position height weight
## 5 Tariq Abdul-Wahad 1998 2003 F 6-6 223
## 6 Shareef Abdur-Rahim 1997 2008 F 6-9 225
## 7 Tom Abernethy 1977 1981 F 6-7 220
## 9 John Abramovic 1947 1948 F 6-3 195
## 14 Bud Acton 1968 1968 F 6-6 210
## 15 Quincy Acy 2013 2018 F 6-7 240
## birth_date college
## 5 November 3, 1974 San Jose State University
## 6 December 11, 1976 University of California
## 7 May 6, 1954 Indiana University
## 9 February 9, 1919 Salem International University
## 14 January 11, 1942 Hillsdale College
## 15 October 6, 1990 Baylor University
summary(forwards)
## name year_start year_end position
## Length:1290 Min. :1947 Min. :1947 Length:1290
## Class :character 1st Qu.:1971 1st Qu.:1973 Class :character
## Mode :character Median :1989 Median :1992 Mode :character
## Mean :1987 Mean :1990
## 3rd Qu.:2004 3rd Qu.:2010
## Max. :2018 Max. :2018
##
## height weight birth_date college
## Length:1290 Min. :165.0 Length:1290 Length:1290
## Class :character 1st Qu.:205.0 Class :character Class :character
## Mode :character Median :219.0 Mode :character Mode :character
## Mean :218.1
## 3rd Qu.:230.0
## Max. :285.0
## NA's :1
hist(forwards$weight, main = "weight of forwards", col="green")
## how can I plot two hist at the same time?
## what about C-F?
#Use the dataset to visually investigate if the distribution of the height of centers (C) is greater than the distribution of the height of forwards (F).
#install.packages(tidyr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- separate(data = centers, col = height, into = c("ft", "inch"), sep = "\\-")
ft_numeric <- as.numeric(df$ft)
head(ft_numeric)
## [1] 7 7 7 7 6 6
inch_numeric <- as.numeric(df$inch)
hist(ft_numeric*12+inch_numeric, main = "height of centers (inch)", col="blue")
dff <- separate(data = forwards, col = height, into = c("ft", "inch"), sep = "\\-")
ft_numeric <- as.numeric(dff$ft)
head(ft_numeric)
## [1] 6 6 6 6 6 6
inch_numeric <- as.numeric(dff$inch)
head(inch_numeric)
## [1] 6 9 7 3 6 7
hist(ft_numeric*12+inch_numeric, main = "height of forwards (inch)", col="green")
##Use the dataset to visually investigate if the distribution of height is different between any of the positions.
#install.packages(dplyr) #library(dplyr) head(hello$position) count(hello,position) # c = 502, F = 1290, G= 1574, let’s check height of G
guards <- subset(hello, hello$position == "G")
dfff <- separate(data = guards, col = height, into = c("ft", "inch"), sep = "\\-")
ft_numeric <- as.numeric(dfff$ft)
head(ft_numeric)
## [1] 6 6 6 6 6 6
inch_numeric <- as.numeric(dfff$inch)
head(inch_numeric)
## [1] 1 3 5 0 4 5
hist(ft_numeric*12+inch_numeric, main = "height of guard (inch)", col="red")
## G<F<C